Load Libraries
# Load tidyverse and anomalize
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5 v purrr 0.3.4
## v tibble 3.1.6 v dplyr 1.0.7
## v tidyr 1.1.4 v stringr 1.4.0
## v readr 2.1.1 v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(anomalize)
## == Use anomalize to improve your Forecasts by 50%! =============================
## Business Science offers a 1-hour course - Lab #18: Time Series Anomaly Detection!
## </> Learn more at: https://university.business-science.io/p/learning-labs-pro </>
library(tibbletime)
##
## Attaching package: 'tibbletime'
## The following object is masked from 'package:stats':
##
## filter
library(timetk)
Reading data
#Read the data
df<- read.csv("C:/Users/user/Downloads/Supermarket_Sales_Forecasting - Sales.csv")
head(df, 5)
## Date Sales
## 1 1/5/2019 548.9715
## 2 3/8/2019 80.2200
## 3 3/3/2019 340.5255
## 4 1/27/2019 489.0480
## 5 2/8/2019 634.3785
str(df)
## 'data.frame': 1000 obs. of 2 variables:
## $ Date : chr "1/5/2019" "3/8/2019" "3/3/2019" "1/27/2019" ...
## $ Sales: num 549 80.2 340.5 489 634.4 ...
df$Date <- as.Date(Sys.Date() + 1:nrow(df))
df %>%
as_tibble() %>%
time_decompose(Sales) %>%
anomalize(remainder) %>%
time_recompose() %>%
filter(anomaly == 'Yes')
## Converting from tbl_df to tbl_time.
## Auto-index message: index = Date
## frequency = 7 days
## trend = 91 days
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
## # A time tibble: 0 x 10
## # Index: Date
## # ... with 10 variables: Date <date>, observed <dbl>, season <dbl>,
## # trend <dbl>, remainder <dbl>, remainder_l1 <dbl>, remainder_l2 <dbl>,
## # anomaly <chr>, recomposed_l1 <dbl>, recomposed_l2 <dbl>
Anomaly Detection
df_anomalized <- df %>%
as.tibble()%>%
time_decompose(Date, merge = TRUE) %>%
anomalize(remainder) %>%
time_recompose()
## Warning: `as.tibble()` was deprecated in tibble 2.0.0.
## Please use `as_tibble()` instead.
## The signature and semantics have changed, see `?as_tibble`.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.
## Converting from tbl_df to tbl_time.
## Auto-index message: index = Date
## frequency = 7 days
## trend = 91 days
df_anomalized %>% glimpse()
## Rows: 1,000
## Columns: 11
## $ Date <date> 2022-02-08, 2022-02-09, 2022-02-10, 2022-02-11, 2022-02~
## $ Sales <dbl> 548.9715, 80.2200, 340.5255, 489.0480, 634.3785, 627.616~
## $ observed <dbl> 19031, 19032, 19033, 19034, 19035, 19036, 19037, 19038, ~
## $ season <dbl> -9.527570e-13, -3.130033e-13, 8.005211e-13, 2.848151e-14~
## $ trend <dbl> 19031, 19032, 19033, 19034, 19035, 19036, 19037, 19038, ~
## $ remainder <dbl> 7.275958e-12, 7.275958e-12, 7.275958e-12, 7.275958e-12, ~
## $ remainder_l1 <dbl> -8.003553e-11, -8.003553e-11, -8.003553e-11, -8.003553e-~
## $ remainder_l2 <dbl> 7.275958e-11, 7.275958e-11, 7.275958e-11, 7.275958e-11, ~
## $ anomaly <chr> "No", "No", "No", "No", "No", "No", "No", "No", "No", "N~
## $ recomposed_l1 <dbl> 19031, 19032, 19033, 19034, 19035, 19036, 19037, 19038, ~
## $ recomposed_l2 <dbl> 19031, 19032, 19033, 19034, 19035, 19036, 19037, 19038, ~
Visualizing anomalies
df_anomalized %>% plot_anomalies(ncol = 5, alpha_dots = 0.2)

Interactive graphical anomaly representation
df %>% timetk::plot_anomaly_diagnostics(Date,Sales, .facet_ncol = 2)
## frequency = 7 observations per 1 week
## trend = 92 observations per 3 months
To find the exact points that are anomalies
df %>% timetk::tk_anomaly_diagnostics(Date, Sales) %>% filter(anomaly=='Yes')
## frequency = 7 observations per 1 week
## trend = 92 observations per 3 months
## # A tibble: 0 x 11
## # ... with 11 variables: Date <date>, observed <dbl>, season <dbl>,
## # trend <dbl>, remainder <dbl>, seasadj <dbl>, remainder_l1 <dbl>,
## # remainder_l2 <dbl>, anomaly <chr>, recomposed_l1 <dbl>, recomposed_l2 <dbl>